#!/usr/bin/python
# -*- coding: utf-8 -*-

import urllib
import urllib2
import random
import re
import time
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf8')

headers = {"User-Agent": "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"}

#检测代理IP是否可用
def checkAgentIp( protocol, ip, port ):
    try:
        url = "http://www.baidu.com"
        proxies={protocol:protocol+"://"+str(ip)+":"+str(port)}

        proxy_s=urllib2.ProxyHandler(proxies)
        opener=urllib2.build_opener(proxy_s)
        urllib2.install_opener(opener)
        request = urllib2.Request(url, headers=headers)
        response = urllib2.urlopen(request, timeout = 3)
        content = response.read()

    except Exception as e:
        return False

    regex = re.compile(r'baidu.com')
    if regex.search(content):
        return True
    else:
        return False

def getContent( url ):
    try:
        #user_agent = getUserAgent()
        headers = {"User-Agent": "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"}



        request = urllib2.Request(url, headers=headers)
        response = urllib2.urlopen(request, timeout = 5)
        content = response.read()
        print content
    except Exception, e:
        print e
        exit()

    soup = BeautifulSoup(content, "lxml")
    ip_list = soup.select("#ip_list tr")
    sql = "insert into agent_ip (ip, port) values "

    for item in ip_list:
        #判断是否有对应的内容
        if item.select(".country div.bar"):
            ip = item.select("td")[1].get_text()
            port = item.select("td")[2].get_text()
            protocol = item.select("td")[5].get_text().lower()

            #判断代理的IP是否可用
            if checkAgentIp( protocol, ip, port ):
                print protocol+"://"+ip+":"+port+"可用"
            else:
                print protocol+"://"+ip+":"+port+"不可用"


url = "http://www.xicidaili.com/nn/"
getContent( url )
#checkAgentIp( "110.73.6.235", "8123" )